# -*- coding: UTF-8 -*-

# 自己的html解析工具类,需要继承HTMLParser,然后重写3个handle方法

from html.parser import HTMLParser


class myHtmlparser(HTMLParser):
    # 要抓取的数据的所在位置:  1-属性值  2-中间值
    value_type = None
    # 要抓取的标签
    tag_name = None
    # 要抓取的属性
    attr_name = None
    # 要抓取的属性的值
    value = []

    # 初始化要获取的某个标签，某个标签的某个属性
    def init(self, value_type, tag_name, attr_name):
        self.tag_name = tag_name
        self.attr_name = attr_name
        self.value_type = value_type

    def handle_starttag(self, tag, attrs):
        if self.value_type == 1:
            if tag == self.tag:
                for name, value in attrs:
                    if name == self.tag_name:
                        self.value.append(value)


    # def handle_endtag(self, tag):
    #     print("Encountered an end tag :", tag)

    def handle_data(self, data):
        if self.value_type == 2:
            if self.tag_name == self.lasttag:
                self.value.append(data)

    # 获取抓取到的数据
    def get_values(self):
        return self.value
